
	frames reset
	scalar drop _all 
	matrix drop _all
	mata: mata clear

********************************************************************************
**********************         USER INPUT        *******************************
********************************************************************************

************ DATA PATH AND FILENAME ********************************************

	local datadir "${clean_data}"

*********** OUTPUT PATH ********************************************************

	local outputdir "${temp}"
	
********** SPECIFY depths *********************************************

	 local depths 1
	 
********** SPECIFY age bins for moving average *********************************

	local age_bin 2 
	
	** options for kernel are "uniform" or "triangular"
	
	local kernel "triangular"
	
*************** Set Type *******************************************************

	local types "singleyear"
	
************ setting list of income variables to use ***************************

	local  incvars ptotinc 

*************** SET AGE RANGE **************************************************

	local minage 20
	local maxage 70

************ CHOOSE COMPARISON OCCUPATIONS *************************************
	
** Which occupations? ex. - gl ocnums 4 6 - for chiropractors and pharmacists**
	*note : if `source' = "nppes" occnums cannot include 8, 9, or 10, which are ACS only.

	local occnums 1 2 3

/*
Dictionary:
1 "Physicians" 

2 "PCPs"

3 "Lawyers" 

*/

********************************************************************************
**********************      END USER INPUT    **********************************
********************************************************************************

** For filling in empty transitions
	*Note: Added just before collapsing transition counts

tempfile fill_in
local n = `maxage' - `minage' + 1
set obs `n'
gen age = `minage' + _n - 1
expand 13
egen counter = seq(), by(age)
gen initial = ""
replace initial = "00" if counter ==  1
replace initial = "01" if counter ==  2
replace initial = "10" if counter ==  3
replace initial = "20" if counter ==  4
replace initial = "30" if counter ==  5
replace initial = "40" if counter ==  6
replace initial = "50" if counter ==  7
replace initial = "60" if counter ==  8
replace initial = "70" if counter ==  9
replace initial = "80" if counter ==  10
replace initial = "90" if counter ==  11
replace initial = "95" if counter ==  12
replace initial = "99" if counter ==  13

rename initial subsequent
drop counter
expand 13
egen counter = seq(), by(age subsequent)
gen initial = ""
replace initial = "00" if counter ==  1
replace initial = "01" if counter ==  2
replace initial = "10" if counter ==  3
replace initial = "20" if counter ==  4
replace initial = "30" if counter ==  5
replace initial = "40" if counter ==  6
replace initial = "50" if counter ==  7
replace initial = "60" if counter ==  8
replace initial = "70" if counter ==  9
replace initial = "80" if counter ==  10
replace initial = "90" if counter ==  11
replace initial = "95" if counter ==  12
replace initial = "99" if counter ==  13
drop counter
gen transition = initial + subsequent
save `fill_in'

********************************************************************************

#delimit ;

local occs 

"
physicians 
pcps
lawyers 


"
;

#delimit cr

*********************** BUILDING TRANSITION MATRICES ***************************

foreach i in `occnums'{

		local o: word `i' of `occs'
		************** LOADING AND IMPLEMENTING SAMPLE RESTRICTIONS ****************
		local pcpflag = 0
		if "`o'" == "pcps"{
			local o "physicians"
			local pcpflag = 1
		}
		use "`datadir'/panel_`o'_clean", clear

		* Must have been between 30-60 at time of ACS 
		* Note that age_survey is missing for "NPPES only" observations
		* note that `o' is plural, but occupation variables are singular
		local o_singular = substr("`o'",1,strlen("`o'") - 1)


		if "`o'" == "physicians"{
			keep if `o_singular'_nppes == 1  
		}
		
		if `pcpflag' == 1 {
			keep if spec_category_code == 1
			local o "pcps"
		}

		* Including taxable income from `minage' to `maxage' years old 
		gen ageflag = inrange(age,`minage'-`age_bin',`maxage'+`age_bin' + 5)
		keep if ageflag == 1


		****************************************************************************

		egen newpersonid = group(personid)
		bysort newpersonid: gen sum_personids=_N


		tsset newpersonid year


		keep if inrange(year,2005,2017)


		keep `incvars' age year newpersonid

		
		quietly {

		foreach incvar of varlist `incvars'{


				foreach depth in `depths'{
					foreach f in "`types'"{
					* can't do multi-year if depth greater than 5, and redundant if depth == 1
						if ( ("`f'" != "multiyear") | (`depth' > 1 & `depth' <= 5) ){
							preserve
							
							if "`f'" == "multiyear" {
								
								** capturing discounted within-individual income profiles `depth'-years long
								
								gen select_age = age - `depth'*floor(age/`depth') == 0
								
								local max_subyear = `depth' - 1
								
								forvalues subyear = 1(1)`max_subyear'{
								
								replace `incvar' = `incvar' + f`subyear'.`incvar' if select_age == 1
								
								}
							
								*Data from ages not divisible by `depth' have been added into the npv at the previous age which is divisible by `depth'
								keep if select_age == 1
								
								
								drop select_age
							
							}
							
							gen incbin=""
							
							gen f_incbin=""

							gen f_`incvar'=f`depth'.`incvar' // Note that this now skips forward by `depth' years, which is the new age increment.
							
								/*	foreach oc in `occuplist'{ 		
									display `oc' 
									count if occupation == `oc'
									summ age if occupation == `oc'
									levelsof age if occupation == `oc'
									}
								*/
								
							
							summ age 
							scalar define agemin=r(min)
							scalar define agemax=r(max)
							
								forvalues a = `=agemin'/`=agemax' {
									if `a' >= `minage' + `depth'{
										replace f_incbin="99" if f_`incvar'>=r(r11) & f_`incvar'!=. & f_`incvar'>=0 &  age==`a' - `depth'
										replace f_incbin="95" if f_`incvar'>=r(r10) & f_`incvar'<r(r11) & f_`incvar'!=. & f_`incvar'>=0 &  age==`a' - `depth'
										replace f_incbin="90" if f_`incvar'>=r(r9) & f_`incvar'<r(r10) & f_`incvar'!=. & f_`incvar'>=0 &  age==`a' - `depth'
										replace f_incbin="80" if f_`incvar'>=r(r8) & f_`incvar'<r(r9) & f_`incvar'!=. & f_`incvar'>=0 &  age==`a' - `depth'
										replace f_incbin="70" if f_`incvar'>=r(r7) & f_`incvar'<r(r8) & f_`incvar'!=. & f_`incvar'>=0 &  age==`a' - `depth'
										replace f_incbin="60" if f_`incvar'>=r(r6) & f_`incvar'<r(r7) & f_`incvar'!=. & f_`incvar'>=0 &  age==`a' - `depth'
										replace f_incbin="50" if f_`incvar'>=r(r5) & f_`incvar'<r(r6) & f_`incvar'!=. & f_`incvar'>=0 &  age==`a' - `depth'
										replace f_incbin="40" if f_`incvar'>=r(r4) & f_`incvar'<r(r5) & f_`incvar'!=. & f_`incvar'>=0 &  age==`a' - `depth'
										replace f_incbin="30" if f_`incvar'>=r(r3) & f_`incvar'<r(r4) & f_`incvar'!=. & f_`incvar'>=0 &  age==`a' - `depth'
										replace f_incbin="20" if f_`incvar'>=r(r2) & f_`incvar'<r(r3) & f_`incvar'!=. & f_`incvar'>=0 &  age==`a' - `depth'
										replace f_incbin="10" if f_`incvar'>=r(r1) & f_`incvar'<r(r2) & f_`incvar'!=. & f_`incvar'>=0 &  age==`a' - `depth'
										replace f_incbin="01" if f_`incvar'<r(r1) & f_`incvar'!=. & f_`incvar'>=0 &  age==`a' - `depth'
										replace f_incbin="00" if f_`incvar' == 0 &  age==`a' - `depth'
									}
								
									gquantiles `incvar' if  `incvar'>0 & inrange(age,`a'-`age_bin',`a'+`age_bin'), _pctile percentiles(10 20 30 40 50 60 70 80 90 95 99)
									
									replace incbin="99" if `incvar'>=r(r11) & `incvar'!=. & `incvar'>=0 &  age==`a'
									replace incbin="95" if `incvar'>=r(r10) & `incvar'<r(r11) & `incvar'!=. & `incvar'>=0 &  age==`a'
									replace incbin="90" if `incvar'>=r(r9) & `incvar'<r(r10) & `incvar'!=. & `incvar'>=0 &  age==`a'
									replace incbin="80" if `incvar'>=r(r8) & `incvar'<r(r9) & `incvar'!=. & `incvar'>=0 &  age==`a'
									replace incbin="70" if `incvar'>=r(r7) & `incvar'<r(r8) & `incvar'!=. & `incvar'>=0 &  age==`a'
									replace incbin="60" if `incvar'>=r(r6) & `incvar'<r(r7) & `incvar'!=. & `incvar'>=0 &  age==`a'
									replace incbin="50" if `incvar'>=r(r5) & `incvar'<r(r6) & `incvar'!=. & `incvar'>=0 &  age==`a'
									replace incbin="40" if `incvar'>=r(r4) & `incvar'<r(r5) & `incvar'!=. & `incvar'>=0 &  age==`a'
									replace incbin="30" if `incvar'>=r(r3) & `incvar'<r(r4) & `incvar'!=. & `incvar'>=0 &  age==`a'
									replace incbin="20" if `incvar'>=r(r2) & `incvar'<r(r3) & `incvar'!=. & `incvar'>=0 &  age==`a'
									replace incbin="10" if `incvar'>=r(r1) & `incvar'<r(r2) & `incvar'!=. & `incvar'>=0 &  age==`a'
									replace incbin="01" if `incvar'<r(r1) & `incvar'!=. & `incvar'>=0 &  age==`a'
									replace incbin="00" if `incvar' == 0 &  age==`a'
									
								}	
									
							
							gegen mean_`incvar' = mean(`incvar'), by(incbin age)

							*Drop last year with no next period centile

							drop if year==2017

							*Drop ages without complete sets

							drop if age<`minage' | age>`maxage'
							drop if incbin == ""
							
							gen transition= incbin + f_incbin
							
													
							*Calculating counts and mean incomes within each moving average age bin

							expand 2*`age_bin' + 1
							egen shiftvar = seq(), by(newpersonid year)
							replace shiftvar = shiftvar - (`age_bin' +1 )
							replace age = age + shiftvar
							drop if age<`minage' | age>`maxage'
							*uniform or triangular weights
							if "`kernel'" == "uniform"{
								gen wt = 1
							}
							if "`kernel'" == "triangular"{
								gen wt = 1 - abs(shiftvar/(`age_bin'+1))
							}
							*appending empty list of transitions and ages so that there will be no missing transition probabilities
							append using `fill_in'
							gen countvar = `incvar' < .
							replace wt = 1 if `incvar' == .
							gcollapse (rawsum) tr_count_raw = countvar (sum) tr_count = countvar (mean) meaninc = mean_`incvar' [iw = wt], by(age transition) fast
							
							gen initial=substr(transition,1,2)
							
							*filling in missing values
							gegen mean_`incvar' = mean(meaninc), by(initial age)
							drop meaninc 
							
							drop if substr(transition,3,1) == ""
							
							egen sum_initial=total(tr_count) , by(age initial)
							gen tr_prob=tr_count/sum_initial
						
	
							gen subsequent = substr(transition,3,2)
							
							*in rare cases no-one from an initial bin is observed `depth' years later
								*in this case assuming transition probability is 1 for same bin, and zero for all others
							replace tr_prob = 0 if sum_initial == 0 
							replace tr_prob = 1 if sum_initial == 0 & initial == subsequent
							replace mean_`incvar' = 0 if sum_initial == 0
							
							gen depth = `depth'
							gen f = "`f'"
							gen incvar = "`incvar'"
							gen occupation = "`o'"
							gen kernel = "`kernel'"
							save "`outputdir'/tm_`o'_`incvar'_`depth'years_`f'_`kernel'.dta", replace
							restore
						}
					}
				}
			
			
		}

	}
}

